{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "V100",
      "authorship_tag": "ABX9TyPlHFyW04xkOShGFsrqykJH",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/KevinWang676/Bark-Voice-Cloning/blob/main/Sambert%E4%B8%AD%E6%96%87%E5%A3%B0%E9%9F%B3%E5%85%8B%E9%9A%86v2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# 全新中文声音克隆 Voice Cloning for Chinese Speech"
      ],
      "metadata": {
        "id": "Uhhc4_stcdSf"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 配置环境 Set up"
      ],
      "metadata": {
        "id": "qIFF53SWVDe-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! nvidia-smi # 需要使用GPU运行"
      ],
      "metadata": {
        "id": "4RZJ1P69VKLU",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "42ee2dbc-c527-472c-bd1f-a5ccfa37f5ae"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Thu Aug 31 07:46:44 2023       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   35C    P0    24W / 300W |      0MiB / 16384MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install openai-whisper\n",
        "! pip install modelscope\n",
        "! pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
        "! pip install typeguard==2.3.1\n",
        "! pip install sox\n",
        "! pip install bitstring\n",
        "! pip install pysptk --no-build-isolation\n",
        "! pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
        "! pip install pytorch_wavelets\n",
        "! pip install tensorboardX\n",
        "! git clone https://github.com/fbcotter/pytorch_wavelets\n",
        "! pip install matplotlib\n",
        "! pip install numpy==1.22.0"
      ],
      "metadata": {
        "id": "s2aAbOEPaVh6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "604b8cc2-6b7a-4185-f14c-c22a51f0b706"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting openai-whisper\n",
            "  Downloading openai-whisper-20230314.tar.gz (792 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m792.9/792.9 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.0)\n",
            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.56.4)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (1.23.5)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.1+cu118)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (4.66.1)\n",
            "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (10.1.0)\n",
            "Collecting tiktoken==0.3.1 (from openai-whisper)\n",
            "  Downloading tiktoken-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ffmpeg-python==0.2.0 (from openai-whisper)\n",
            "  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)\n",
            "Requirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from ffmpeg-python==0.2.0->openai-whisper) (0.18.3)\n",
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2023.6.3)\n",
            "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2.31.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.27.2)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.12.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (16.0.6)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (67.7.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2023.7.22)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper) (1.3.0)\n",
            "Building wheels for collected packages: openai-whisper\n",
            "  Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for openai-whisper: filename=openai_whisper-20230314-py3-none-any.whl size=796907 sha256=3f1f7b29554cb12038ac028a6ee51993ce2ef2958631f12e95b53f3406e33050\n",
            "  Stored in directory: /root/.cache/pip/wheels/b2/13/5f/fe8245f6dc59df505879da4b2129932e342f02a80e6b87f27d\n",
            "Successfully built openai-whisper\n",
            "Installing collected packages: ffmpeg-python, tiktoken, openai-whisper\n",
            "Successfully installed ffmpeg-python-0.2.0 openai-whisper-20230314 tiktoken-0.3.1\n",
            "Collecting modelscope\n",
            "  Downloading modelscope-1.8.4-py3-none-any.whl (4.9 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting addict (from modelscope)\n",
            "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
            "Requirement already satisfied: attrs in /usr/local/lib/python3.10/dist-packages (from modelscope) (23.1.0)\n",
            "Collecting datasets<=2.13.0,>=2.8.0 (from modelscope)\n",
            "  Downloading datasets-2.13.0-py3-none-any.whl (485 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m485.6/485.6 kB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting einops (from modelscope)\n",
            "  Downloading einops-0.6.1-py3-none-any.whl (42 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.2/42.2 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: filelock>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (3.12.2)\n",
            "Requirement already satisfied: gast>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.4.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.23.5)\n",
            "Collecting oss2 (from modelscope)\n",
            "  Downloading oss2-2.18.1.tar.gz (274 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.3/274.3 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.5.3)\n",
            "Requirement already satisfied: Pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (9.4.0)\n",
            "Collecting pyarrow!=9.0.0,>=6.0.0 (from modelscope)\n",
            "  Downloading pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 MB\u001b[0m \u001b[31m45.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.8.2)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from modelscope) (6.0.1)\n",
            "Requirement already satisfied: requests>=2.25 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.31.0)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.10.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from modelscope) (67.7.2)\n",
            "Collecting simplejson>=3.3.0 (from modelscope)\n",
            "  Downloading simplejson-3.19.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (137 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.9/137.9 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: sortedcontainers>=1.5.9 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.4.0)\n",
            "Requirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (4.66.1)\n",
            "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.0.4)\n",
            "Collecting yapf (from modelscope)\n",
            "  Downloading yapf-0.40.1-py3-none-any.whl (250 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m250.3/250.3 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting dill<0.3.7,>=0.3.0 (from datasets<=2.13.0,>=2.8.0->modelscope)\n",
            "  Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting xxhash (from datasets<=2.13.0,>=2.8.0->modelscope)\n",
            "  Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting multiprocess (from datasets<=2.13.0,>=2.8.0->modelscope)\n",
            "  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (2023.6.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (3.8.5)\n",
            "Collecting huggingface-hub<1.0.0,>=0.11.0 (from datasets<=2.13.0,>=2.8.0->modelscope)\n",
            "  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (23.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.1->modelscope) (1.16.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (2023.7.22)\n",
            "Collecting crcmod>=1.7 (from oss2->modelscope)\n",
            "  Downloading crcmod-1.7.tar.gz (89 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.7/89.7 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting pycryptodome>=3.4.7 (from oss2->modelscope)\n",
            "  Downloading pycryptodome-3.18.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m90.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aliyun-python-sdk-kms>=2.4.1 (from oss2->modelscope)\n",
            "  Downloading aliyun_python_sdk_kms-2.16.1-py2.py3-none-any.whl (70 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.8/70.8 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aliyun-python-sdk-core>=2.13.12 (from oss2->modelscope)\n",
            "  Downloading aliyun-python-sdk-core-2.13.36.tar.gz (440 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.5/440.5 kB\u001b[0m \u001b[31m45.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->modelscope) (2023.3)\n",
            "Requirement already satisfied: importlib-metadata>=6.6.0 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (6.8.0)\n",
            "Requirement already satisfied: platformdirs>=3.5.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (3.10.0)\n",
            "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (2.0.1)\n",
            "Collecting jmespath<1.0.0,>=0.9.3 (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope)\n",
            "  Downloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)\n",
            "Requirement already satisfied: cryptography>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (41.0.3)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (6.0.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (4.0.3)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.9.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.4.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.3.1)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets<=2.13.0,>=2.8.0->modelscope) (4.7.1)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=6.6.0->yapf->modelscope) (3.16.2)\n",
            "INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.\n",
            "Collecting multiprocess (from datasets<=2.13.0,>=2.8.0->modelscope)\n",
            "  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m16.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (1.15.1)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (2.21)\n",
            "Building wheels for collected packages: oss2, aliyun-python-sdk-core, crcmod\n",
            "  Building wheel for oss2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for oss2: filename=oss2-2.18.1-py3-none-any.whl size=115175 sha256=2902a22b29235e69d55720bc93c3b6d72fccccbe9e1211538db502da0f129a3a\n",
            "  Stored in directory: /root/.cache/pip/wheels/54/b1/27/f9d5791a1f01ee0ad99c934aa0e1df3eaae375b5d5aa3da184\n",
            "  Building wheel for aliyun-python-sdk-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for aliyun-python-sdk-core: filename=aliyun_python_sdk_core-2.13.36-py3-none-any.whl size=533190 sha256=630d5bd7cc39123af8b9ce7e2b6d7ec588fbd60800db17c3b668f66edf8487c7\n",
            "  Stored in directory: /root/.cache/pip/wheels/c0/f4/0e/87c534857132bd3bd2c4465c0b15b4db650cf6c15a876bda34\n",
            "  Building wheel for crcmod (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for crcmod: filename=crcmod-1.7-cp310-cp310-linux_x86_64.whl size=31405 sha256=5969199ddbffe80016650574a05f5ba65cbc83bb839dc881302a238ea4b462f5\n",
            "  Stored in directory: /root/.cache/pip/wheels/85/4c/07/72215c529bd59d67e3dac29711d7aba1b692f543c808ba9e86\n",
            "Successfully built oss2 aliyun-python-sdk-core crcmod\n",
            "Installing collected packages: crcmod, addict, xxhash, simplejson, pycryptodome, pyarrow, jmespath, einops, dill, yapf, multiprocess, huggingface-hub, aliyun-python-sdk-core, datasets, aliyun-python-sdk-kms, oss2, modelscope\n",
            "  Attempting uninstall: pyarrow\n",
            "    Found existing installation: pyarrow 9.0.0\n",
            "    Uninstalling pyarrow-9.0.0:\n",
            "      Successfully uninstalled pyarrow-9.0.0\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed addict-2.4.0 aliyun-python-sdk-core-2.13.36 aliyun-python-sdk-kms-2.16.1 crcmod-1.7 datasets-2.13.0 dill-0.3.6 einops-0.6.1 huggingface-hub-0.16.4 jmespath-0.10.0 modelscope-1.8.4 multiprocess-0.70.14 oss2-2.18.1 pyarrow-13.0.0 pycryptodome-3.18.0 simplejson-3.19.1 xxhash-3.3.0 yapf-0.40.1\n",
            "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
            "Collecting tts-autolabel\n",
            "  Downloading https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/kantts/tts_autolabel-1.1.8-py3-none-any.whl (117 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.3/117.3 kB\u001b[0m \u001b[31m216.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.1+cu118)\n",
            "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.2+cu118)\n",
            "Collecting onnxruntime (from tts-autolabel)\n",
            "  Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.10.1)\n",
            "Collecting numpy<=1.23.1 (from tts-autolabel)\n",
            "  Downloading numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.0/17.0 MB\u001b[0m \u001b[31m54.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting sox (from tts-autolabel)\n",
            "  Downloading sox-1.4.1-py2.py3-none-any.whl (39 kB)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (6.0.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (4.66.1)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.10.1)\n",
            "Collecting nls-fa (from tts-autolabel)\n",
            "  Downloading https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/nls_fa-0.1-cp310-cp310-linux_x86_64.whl (1.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting kaldi-native-fbank (from tts-autolabel)\n",
            "  Downloading kaldi_native_fbank-1.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (210 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m210.5/210.5 kB\u001b[0m \u001b[31m23.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting typeguard<=2.13.3 (from tts-autolabel)\n",
            "  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)\n",
            "Collecting ttsfrd>=0.2.1 (from tts-autolabel)\n",
            "  Downloading https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/ttsfrd/linux/ttsfrd-0.2.1-cp310-cp310-linux_x86_64.whl (53.4 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.4/53.4 MB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (3.0.0)\n",
            "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.2.2)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.3.2)\n",
            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.4.2)\n",
            "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.56.4)\n",
            "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.12.1)\n",
            "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.7.0)\n",
            "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3.6)\n",
            "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.7.1)\n",
            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3)\n",
            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.0.5)\n",
            "Collecting coloredlogs (from onnxruntime->tts-autolabel)\n",
            "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.5.26)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.1)\n",
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (3.20.3)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (1.12)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.12.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (3.27.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (16.0.6)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (67.7.2)\n",
            "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (3.10.0)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (2.31.0)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->tts-autolabel) (3.2.0)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa->tts-autolabel) (1.15.1)\n",
            "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime->tts-autolabel)\n",
            "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->tts-autolabel) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime->tts-autolabel) (1.3.0)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa->tts-autolabel) (2.21)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2023.7.22)\n",
            "Installing collected packages: typeguard, ttsfrd, numpy, nls-fa, kaldi-native-fbank, humanfriendly, sox, coloredlogs, onnxruntime, tts-autolabel\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.23.5\n",
            "    Uninstalling numpy-1.23.5:\n",
            "      Successfully uninstalled numpy-1.23.5\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed coloredlogs-15.0.1 humanfriendly-10.0 kaldi-native-fbank-1.18.4 nls-fa-0.1 numpy-1.23.1 onnxruntime-1.15.1 sox-1.4.1 tts-autolabel-1.1.8 ttsfrd-0.2.1 typeguard-2.13.3\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "numpy"
                ]
              }
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting typeguard==2.3.1\n",
            "  Downloading typeguard-2.3.1-py3-none-any.whl (10 kB)\n",
            "Installing collected packages: typeguard\n",
            "  Attempting uninstall: typeguard\n",
            "    Found existing installation: typeguard 2.13.3\n",
            "    Uninstalling typeguard-2.13.3:\n",
            "      Successfully uninstalled typeguard-2.13.3\n",
            "Successfully installed typeguard-2.3.1\n",
            "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (1.4.1)\n",
            "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from sox) (1.23.1)\n",
            "Collecting bitstring\n",
            "  Downloading bitstring-4.1.1-py3-none-any.whl (56 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.8/56.8 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting bitarray<3.0.0,>=2.8.0 (from bitstring)\n",
            "  Downloading bitarray-2.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (286 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m286.2/286.2 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: bitarray, bitstring\n",
            "Successfully installed bitarray-2.8.1 bitstring-4.1.1\n",
            "Collecting pysptk\n",
            "  Downloading pysptk-0.2.1.tar.gz (419 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m419.7/419.7 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from pysptk) (1.10.1)\n",
            "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from pysptk) (4.4.2)\n",
            "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk) (0.29.36)\n",
            "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy->pysptk) (1.23.1)\n",
            "Building wheels for collected packages: pysptk\n",
            "  Building wheel for pysptk (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pysptk: filename=pysptk-0.2.1-cp310-cp310-linux_x86_64.whl size=1137273 sha256=b57158ca005bb75139e737d40ee28c7fb86b5e166def7734f2a1dfd5997e5812\n",
            "  Stored in directory: /root/.cache/pip/wheels/aa/96/e5/2348f6f568b9888d3505a97dc53287152ffd0440526cc6105d\n",
            "Successfully built pysptk\n",
            "Installing collected packages: pysptk\n",
            "Successfully installed pysptk-0.2.1\n",
            "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
            "Collecting kantts\n",
            "  Downloading https://modelscope.oss-cn-beijing.aliyuncs.com/releases/dependencies/kantts/kantts-1.0.1-py3-none-any.whl (147 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.2/147.2 kB\u001b[0m \u001b[31m212.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: librosa>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (0.10.1)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from kantts) (3.7.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from kantts) (1.23.1)\n",
            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from kantts) (0.56.4)\n",
            "Collecting unidecode (from kantts)\n",
            "  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m235.9/235.9 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from kantts) (7.0.0)\n",
            "Requirement already satisfied: pywavelets>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
            "Requirement already satisfied: scikit-learn>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.2.2)\n",
            "Requirement already satisfied: scipy>=1.7.3 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.10.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kantts) (4.66.1)\n",
            "Requirement already satisfied: pysptk in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
            "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
            "Requirement already satisfied: ttsfrd in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
            "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (3.0.0)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.3.2)\n",
            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.4.2)\n",
            "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.12.1)\n",
            "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.7.0)\n",
            "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3.6)\n",
            "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.7.1)\n",
            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3)\n",
            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.0.5)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (67.7.2)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0.2->kantts) (3.2.0)\n",
            "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->kantts) (2.2.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.4.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (23.1)\n",
            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (9.4.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (2.8.2)\n",
            "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk->kantts) (0.29.36)\n",
            "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (3.10.0)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (2.31.0)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->kantts) (0.5.0)\n",
            "Requirement already satisfied: pydantic-core==2.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9.1->inflect->kantts) (2.6.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->kantts) (1.16.0)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa>=0.9.2->kantts) (1.15.1)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa>=0.9.2->kantts) (2.21)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2023.7.22)\n",
            "Installing collected packages: unidecode, kantts\n",
            "Successfully installed kantts-1.0.1 unidecode-1.3.6\n",
            "Collecting pytorch_wavelets\n",
            "  Downloading pytorch_wavelets-1.3.0-py3-none-any.whl (54 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.9/54.9 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.23.1)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.16.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (2.0.1+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (3.27.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch_wavelets) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch_wavelets) (1.3.0)\n",
            "Installing collected packages: pytorch_wavelets\n",
            "Successfully installed pytorch_wavelets-1.3.0\n",
            "Collecting tensorboardX\n",
            "  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (1.23.1)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (23.1)\n",
            "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (3.20.3)\n",
            "Installing collected packages: tensorboardX\n",
            "Successfully installed tensorboardX-2.6.2.2\n",
            "Cloning into 'pytorch_wavelets'...\n",
            "remote: Enumerating objects: 978, done.\u001b[K\n",
            "remote: Counting objects: 100% (142/142), done.\u001b[K\n",
            "remote: Compressing objects: 100% (96/96), done.\u001b[K\n",
            "remote: Total 978 (delta 79), reused 91 (delta 46), pack-reused 836\u001b[K\n",
            "Receiving objects: 100% (978/978), 6.80 MiB | 7.11 MiB/s, done.\n",
            "Resolving deltas: 100% (663/663), done.\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.4)\n",
            "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.23.1)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (23.1)\n",
            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (9.4.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (2.8.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
            "Collecting numpy==1.22.0\n",
            "  Downloading numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.8/16.8 MB\u001b[0m \u001b[31m84.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: numpy\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.23.1\n",
            "    Uninstalling numpy-1.23.1:\n",
            "      Successfully uninstalled numpy-1.23.1\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "librosa 0.10.1 requires numpy!=1.22.0,!=1.22.1,!=1.22.2,>=1.20.3, but you have numpy 1.22.0 which is incompatible.\n",
            "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\n",
            "plotnine 0.12.2 requires numpy>=1.23.0, but you have numpy 1.22.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed numpy-1.22.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "numpy"
                ]
              }
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!apt-get install sox"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "b4uSdumvUcQC",
        "outputId": "df09129e-5df0-44cf-bd46-d5589cb1b438"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Reading package lists... Done\n",
            "Building dependency tree... Done\n",
            "Reading state information... Done\n",
            "The following additional packages will be installed:\n",
            "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base\n",
            "  libsox3 libwavpack1\n",
            "Suggested packages:\n",
            "  libsox-fmt-all\n",
            "The following NEW packages will be installed:\n",
            "  libopencore-amrnb0 libopencore-amrwb0 libsox-fmt-alsa libsox-fmt-base\n",
            "  libsox3 libwavpack1 sox\n",
            "0 upgraded, 7 newly installed, 0 to remove and 16 not upgraded.\n",
            "Need to get 617 kB of archives.\n",
            "After this operation, 1,760 kB of additional disk space will be used.\n",
            "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrnb0 amd64 0.1.5-1 [94.8 kB]\n",
            "Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libopencore-amrwb0 amd64 0.1.5-1 [49.1 kB]\n",
            "Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox3 amd64 14.4.2+git20190427-2+deb11u2build0.22.04.1 [240 kB]\n",
            "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-alsa amd64 14.4.2+git20190427-2+deb11u2build0.22.04.1 [11.2 kB]\n",
            "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libwavpack1 amd64 5.4.0-1build2 [83.7 kB]\n",
            "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libsox-fmt-base amd64 14.4.2+git20190427-2+deb11u2build0.22.04.1 [33.7 kB]\n",
            "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 sox amd64 14.4.2+git20190427-2+deb11u2build0.22.04.1 [104 kB]\n",
            "Fetched 617 kB in 1s (580 kB/s)\n",
            "Selecting previously unselected package libopencore-amrnb0:amd64.\n",
            "(Reading database ... 120831 files and directories currently installed.)\n",
            "Preparing to unpack .../0-libopencore-amrnb0_0.1.5-1_amd64.deb ...\n",
            "Unpacking libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
            "Selecting previously unselected package libopencore-amrwb0:amd64.\n",
            "Preparing to unpack .../1-libopencore-amrwb0_0.1.5-1_amd64.deb ...\n",
            "Unpacking libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
            "Selecting previously unselected package libsox3:amd64.\n",
            "Preparing to unpack .../2-libsox3_14.4.2+git20190427-2+deb11u2build0.22.04.1_amd64.deb ...\n",
            "Unpacking libsox3:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Selecting previously unselected package libsox-fmt-alsa:amd64.\n",
            "Preparing to unpack .../3-libsox-fmt-alsa_14.4.2+git20190427-2+deb11u2build0.22.04.1_amd64.deb ...\n",
            "Unpacking libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Selecting previously unselected package libwavpack1:amd64.\n",
            "Preparing to unpack .../4-libwavpack1_5.4.0-1build2_amd64.deb ...\n",
            "Unpacking libwavpack1:amd64 (5.4.0-1build2) ...\n",
            "Selecting previously unselected package libsox-fmt-base:amd64.\n",
            "Preparing to unpack .../5-libsox-fmt-base_14.4.2+git20190427-2+deb11u2build0.22.04.1_amd64.deb ...\n",
            "Unpacking libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Selecting previously unselected package sox.\n",
            "Preparing to unpack .../6-sox_14.4.2+git20190427-2+deb11u2build0.22.04.1_amd64.deb ...\n",
            "Unpacking sox (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Setting up libsox3:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Setting up libopencore-amrwb0:amd64 (0.1.5-1) ...\n",
            "Setting up libsox-fmt-alsa:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Setting up libwavpack1:amd64 (5.4.0-1build2) ...\n",
            "Setting up libopencore-amrnb0:amd64 (0.1.5-1) ...\n",
            "Setting up libsox-fmt-base:amd64 (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Setting up sox (14.4.2+git20190427-2+deb11u2build0.22.04.1) ...\n",
            "Processing triggers for man-db (2.10.2-1) ...\n",
            "Processing triggers for libc-bin (2.35-0ubuntu3.1) ...\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n",
            "\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n",
            "\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n",
            "\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n",
            "\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n",
            "\n",
            "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd pytorch_wavelets"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "A5mtXiAEUeOi",
        "outputId": "e777914d-f3a1-4dfe-b0f9-b44a49b1bd21"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/pytorch_wavelets\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "pip install ."
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0279o9cEUgYo",
        "outputId": "db8ed7e3-834e-4624-b7c2-44291426a63a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Processing /content/pytorch_wavelets\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.22.0)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.16.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (2.0.1+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (3.27.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch-wavelets==1.3.0) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch-wavelets==1.3.0) (1.3.0)\n",
            "Building wheels for collected packages: pytorch-wavelets\n",
            "  Building wheel for pytorch-wavelets (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pytorch-wavelets: filename=pytorch_wavelets-1.3.0-py3-none-any.whl size=54852 sha256=6db8460229d2f3b684b7c3e8ef429bb0dee02914a30baba28db7994e49ea6ae9\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-2bodt4br/wheels/f6/77/89/ac0462961f441a5dd4d226216613ef64f3eb2c7f2883efcebf\n",
            "Successfully built pytorch-wavelets\n",
            "Installing collected packages: pytorch-wavelets\n",
            "  Attempting uninstall: pytorch-wavelets\n",
            "    Found existing installation: pytorch-wavelets 1.3.0\n",
            "    Uninstalling pytorch-wavelets-1.3.0:\n",
            "      Successfully uninstalled pytorch-wavelets-1.3.0\n",
            "Successfully installed pytorch-wavelets-1.3.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "os._exit(00) # 重启notebook"
      ],
      "metadata": {
        "id": "lO29uxHrVafx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import sox"
      ],
      "metadata": {
        "id": "UsOLbeoYFO04"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 对音频切片处理"
      ],
      "metadata": {
        "id": "YZqH0DtGcsXJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import subprocess\n",
        "from pathlib import Path\n",
        "import librosa\n",
        "from scipy.io import wavfile\n",
        "import numpy as np\n",
        "import torch\n",
        "import csv\n",
        "import whisper\n",
        "\n",
        "def split_long_audio(model, filepaths, save_dir=\"data_dir\", out_sr=44100):\n",
        "    if isinstance(filepaths, str):\n",
        "        filepaths = [filepaths]\n",
        "\n",
        "    for file_idx, filepath in enumerate(filepaths):\n",
        "\n",
        "        save_path = Path(save_dir)\n",
        "        save_path.mkdir(exist_ok=True, parents=True)\n",
        "\n",
        "        print(f\"Transcribing file {file_idx}: '{filepath}' to segments...\")\n",
        "        result = model.transcribe(filepath, word_timestamps=True, task=\"transcribe\", beam_size=5, best_of=5)\n",
        "        segments = result['segments']\n",
        "\n",
        "        wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)\n",
        "        wav, _ = librosa.effects.trim(wav, top_db=20)\n",
        "        peak = np.abs(wav).max()\n",
        "        if peak > 1.0:\n",
        "            wav = 0.98 * wav / peak\n",
        "        wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)\n",
        "        wav2 /= max(wav2.max(), -wav2.min())\n",
        "\n",
        "        for i, seg in enumerate(segments):\n",
        "            start_time = seg['start']\n",
        "            end_time = seg['end']\n",
        "            wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]\n",
        "            wav_seg_name = f\"{file_idx}_{i}.wav\"\n",
        "            out_fpath = save_path / wav_seg_name\n",
        "            wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))"
      ],
      "metadata": {
        "id": "NdoD-ZnIaWhN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "whisper_size = \"medium\"\n",
        "whisper_model = whisper.load_model(whisper_size)"
      ],
      "metadata": {
        "id": "yXrxpjEGaWlT",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5fa21ff1-9b8b-4e5a-c45f-c97951ea12e2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████████████████████████████████| 1.42G/1.42G [00:08<00:00, 190MiB/s]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 需要新建两个文件夹，分别是：\"output_training_data\", \"pretrain_work_dir\""
      ],
      "metadata": {
        "id": "ccQC4ZcWbtXy"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from modelscope.tools import run_auto_label\n",
        "\n",
        "import os\n",
        "from modelscope.models.audio.tts import SambertHifigan\n",
        "from modelscope.pipelines import pipeline\n",
        "from modelscope.utils.constant import Tasks\n",
        "\n",
        "from modelscope.metainfo import Trainers\n",
        "from modelscope.trainers import build_trainer\n",
        "from modelscope.utils.audio.audio_utils import TtsTrainType\n",
        "\n",
        "pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'\n",
        "\n",
        "dataset_id = \"./output_training_data/\"\n",
        "pretrain_work_dir = \"./pretrain_work_dir/\"\n",
        "\n"
      ],
      "metadata": {
        "id": "X6U1kIM8ouSs",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7edf72aa-df6e-465c-90a8-4114beb17112"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2023-08-31 07:49:43,205 - modelscope - INFO - PyTorch version 2.0.1+cu118 Found.\n",
            "2023-08-31 07:49:43,208 - modelscope - INFO - TensorFlow version 2.12.0 Found.\n",
            "2023-08-31 07:49:43,209 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n",
            "2023-08-31 07:49:43,212 - modelscope - INFO - No valid ast index found from /root/.cache/modelscope/ast_indexer, generating ast index from prebuilt!\n",
            "2023-08-31 07:49:43,268 - modelscope - INFO - Loading done! Current index file version is 1.8.4, with md5 0b679d299d0b8626e3c5221da77c0ae7 and a total number of 902 components indexed\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def auto_label(audio):\n",
        "  try:\n",
        "    split_long_audio(whisper_model, audio, \"test_wavs\")\n",
        "    os.makedirs(\"output_training_data\", exist_ok=True)\n",
        "    input_wav = \"./test_wavs/\"\n",
        "    output_data = \"./output_training_data/\"\n",
        "    ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision=\"v1.0.7\")\n",
        "\n",
        "  except Exception:\n",
        "    pass\n",
        "  return \"标注成功\""
      ],
      "metadata": {
        "id": "uq5Jdd7umcvj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def train(a):\n",
        "  try:\n",
        "    os.makedirs(\"pretrain_work_dir\", exist_ok=True)\n",
        "\n",
        "    train_info = {\n",
        "        TtsTrainType.TRAIN_TYPE_SAMBERT: {  # 配置训练AM（sambert）模型\n",
        "            'train_steps': 52,               # 训练多少个step\n",
        "            'save_interval_steps': 50,       # 每训练多少个step保存一次checkpoint\n",
        "            'log_interval': 10               # 每训练多少个step打印一次训练日志\n",
        "        }\n",
        "    }\n",
        "\n",
        "    # 配置训练参数，指定数据集，临时工作目录和train_info\n",
        "    kwargs = dict(\n",
        "        model=pretrained_model_id,                  # 指定要finetune的模型\n",
        "        model_revision = \"v1.0.6\",\n",
        "        work_dir=pretrain_work_dir,                 # 指定临时工作目录\n",
        "        train_dataset=dataset_id,                   # 指定数据集id\n",
        "        train_type=train_info                       # 指定要训练类型及参数\n",
        "    )\n",
        "\n",
        "    trainer = build_trainer(Trainers.speech_kantts_trainer,\n",
        "                            default_args=kwargs)\n",
        "\n",
        "    trainer.train()\n",
        "\n",
        "  except Exception:\n",
        "    pass\n",
        "\n",
        "  return \"训练完成\"\n"
      ],
      "metadata": {
        "id": "36jXpTLyoO3s"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import random\n",
        "\n",
        "def infer(text):\n",
        "\n",
        "  model_dir = os.path.abspath(\"./pretrain_work_dir\")\n",
        "\n",
        "  custom_infer_abs = {\n",
        "      'voice_name':\n",
        "      'F7',\n",
        "      'am_ckpt':\n",
        "      os.path.join(model_dir, 'tmp_am', 'ckpt'),\n",
        "      'am_config':\n",
        "      os.path.join(model_dir, 'tmp_am', 'config.yaml'),\n",
        "      'voc_ckpt':\n",
        "      os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),\n",
        "      'voc_config':\n",
        "      os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',\n",
        "              'config.yaml'),\n",
        "      'audio_config':\n",
        "      os.path.join(model_dir, 'data', 'audio_config.yaml'),\n",
        "      'se_file':\n",
        "      os.path.join(model_dir, 'data', 'se', 'se.npy')\n",
        "  }\n",
        "  kwargs = {'custom_ckpt': custom_infer_abs}\n",
        "\n",
        "  model_id = SambertHifigan(os.path.join(model_dir, \"orig_model\"), **kwargs)\n",
        "\n",
        "  inference = pipeline(task=Tasks.text_to_speech, model=model_id)\n",
        "  output = inference(input=text)\n",
        "\n",
        "  filename = str(random.randint(1, 1000000000000))\n",
        "\n",
        "  with open(filename + \"myfile.wav\", mode='bx') as f:\n",
        "      f.write(output[\"output_wav\"])\n",
        "  return filename + \"myfile.wav\"\n"
      ],
      "metadata": {
        "id": "qw7KYommooFU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "pip install gradio"
      ],
      "metadata": {
        "id": "GNJhguT6q4ej",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "cba0f880-4dc5-40a1-9fa5-2d2c42b556a4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting gradio\n",
            "  Downloading gradio-3.41.2-py3-none-any.whl (20.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.1/20.1 MB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)\n",
            "  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n",
            "Collecting fastapi (from gradio)\n",
            "  Downloading fastapi-0.103.0-py3-none-any.whl (66 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.2/66.2 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting ffmpy (from gradio)\n",
            "  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting gradio-client==0.5.0 (from gradio)\n",
            "  Downloading gradio_client-0.5.0-py3-none-any.whl (298 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.2/298.2 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting httpx (from gradio)\n",
            "  Downloading httpx-0.24.1-py3-none-any.whl (75 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.16.4)\n",
            "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
            "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n",
            "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.3)\n",
            "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
            "Requirement already satisfied: numpy~=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.0)\n",
            "Collecting orjson~=3.0 (from gradio)\n",
            "  Downloading orjson-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (139 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.9/139.9 kB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (23.1)\n",
            "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n",
            "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n",
            "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.2.1)\n",
            "Collecting pydub (from gradio)\n",
            "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
            "Collecting python-multipart (from gradio)\n",
            "  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
            "Requirement already satisfied: requests~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.31.0)\n",
            "Collecting semantic-version~=2.0 (from gradio)\n",
            "  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.7.1)\n",
            "Collecting uvicorn>=0.14.0 (from gradio)\n",
            "  Downloading uvicorn-0.23.2-py3-none-any.whl (59 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting websockets<12.0,>=10.0 (from gradio)\n",
            "  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.5.0->gradio) (2023.6.0)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n",
            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.0)\n",
            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.2)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.66.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.4)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.3)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4->gradio) (0.5.0)\n",
            "Requirement already satisfied: pydantic-core==2.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4->gradio) (2.6.1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2023.7.22)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.7)\n",
            "Collecting h11>=0.8 (from uvicorn>=0.14.0->gradio)\n",
            "  Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting starlette<0.28.0,>=0.27.0 (from fastapi->gradio)\n",
            "  Downloading starlette-0.27.0-py3-none-any.whl (66 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting httpcore<0.18.0,>=0.15.0 (from httpx->gradio)\n",
            "  Downloading httpcore-0.17.3-py3-none-any.whl (74 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n",
            "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from httpcore<0.18.0,>=0.15.0->httpx->gradio) (3.7.1)\n",
            "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.1.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.7.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.30.2)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.9.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n",
            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->httpcore<0.18.0,>=0.15.0->httpx->gradio) (1.1.3)\n",
            "Building wheels for collected packages: ffmpy\n",
            "  Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for ffmpy: filename=ffmpy-0.3.1-py3-none-any.whl size=5579 sha256=050e26280ad1f47f4cc51ef4bd0044dadd4befcb088f21983acc55ff71a10c87\n",
            "  Stored in directory: /root/.cache/pip/wheels/01/a6/d1/1c0828c304a4283b2c1639a09ad86f83d7c487ef34c6b4a1bf\n",
            "Successfully built ffmpy\n",
            "Installing collected packages: pydub, ffmpy, websockets, semantic-version, python-multipart, orjson, h11, aiofiles, uvicorn, starlette, httpcore, httpx, fastapi, gradio-client, gradio\n",
            "Successfully installed aiofiles-23.2.1 fastapi-0.103.0 ffmpy-0.3.1 gradio-3.41.2 gradio-client-0.5.0 h11-0.14.0 httpcore-0.17.3 httpx-0.24.1 orjson-3.9.5 pydub-0.25.1 python-multipart-0.0.6 semantic-version-2.10.0 starlette-0.27.0 uvicorn-0.23.2 websockets-11.0.3\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import gradio as gr"
      ],
      "metadata": {
        "id": "e30tBe6vtzGU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "app = gr.Blocks()\n",
        "\n",
        "with app:\n",
        "    gr.Markdown(\"# <center>🥳🎶🎡 - Sambert中文声音克隆</center>\")\n",
        "    gr.Markdown(\"## <center>🌟 - 训练3分钟，推理5秒钟，中英自然发音 </center>\")\n",
        "    gr.Markdown(\"### <center>🌊 - 更多精彩应用，敬请关注[滔滔AI](http://www.talktalkai.com)；滔滔AI，为爱滔滔！💕</center>\")\n",
        "\n",
        "    with gr.Row():\n",
        "      inp1 = gr.Audio(type=\"filepath\", label=\"请上传一段音频\")\n",
        "      out1 = gr.Textbox(label=\"标注情况\", lines=1, interactive=False)\n",
        "\n",
        "      out2 = gr.Textbox(label=\"训练情况\", lines=1, interactive=False)\n",
        "      inp2 = gr.Textbox(label=\"文本\", lines=3)\n",
        "      out3 = gr.Audio(type=\"filepath\", label=\"合成的音频\")\n",
        "      btn1 = gr.Button(\"标注数据\")\n",
        "      btn2 = gr.Button(\"训练\")\n",
        "      btn3 = gr.Button(\"推理\")\n",
        "\n",
        "      btn1.click(auto_label, inp1, out1)\n",
        "      btn2.click(train, out1, out2)\n",
        "      btn3.click(infer, inp2, out3)\n",
        "\n",
        "    gr.Markdown(\"### <center>注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。</center>\")\n",
        "    gr.HTML('''\n",
        "        <div class=\"footer\">\n",
        "                    <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘\n",
        "                    </p>\n",
        "        </div>\n",
        "    ''')\n",
        "app.launch(show_error=True)"
      ],
      "metadata": {
        "id": "P03LT9IOrq2C",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 617
        },
        "outputId": "1a0453cf-b81e-442b-e1fe-130dea4be07a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
            "Note: opening Chrome Inspector may crash demo inside Colab notebooks.\n",
            "\n",
            "To create a public link, set `share=True` in `launch()`.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "(async (port, path, width, height, cache, element) => {\n",
              "                        if (!google.colab.kernel.accessAllowed && !cache) {\n",
              "                            return;\n",
              "                        }\n",
              "                        element.appendChild(document.createTextNode(''));\n",
              "                        const url = await google.colab.kernel.proxyPort(port, {cache});\n",
              "\n",
              "                        const external_link = document.createElement('div');\n",
              "                        external_link.innerHTML = `\n",
              "                            <div style=\"font-family: monospace; margin-bottom: 0.5rem\">\n",
              "                                Running on <a href=${new URL(path, url).toString()} target=\"_blank\">\n",
              "                                    https://localhost:${port}${path}\n",
              "                                </a>\n",
              "                            </div>\n",
              "                        `;\n",
              "                        element.appendChild(external_link);\n",
              "\n",
              "                        const iframe = document.createElement('iframe');\n",
              "                        iframe.src = new URL(path, url).toString();\n",
              "                        iframe.height = height;\n",
              "                        iframe.allow = \"autoplay; camera; microphone; clipboard-read; clipboard-write;\"\n",
              "                        iframe.width = width;\n",
              "                        iframe.style.border = 0;\n",
              "                        element.appendChild(iframe);\n",
              "                    })(7860, \"/\", \"100%\", 500, false, window.element)"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": []
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#infer(\"欢迎使用滔滔智能的声音克隆产品\")"
      ],
      "metadata": {
        "id": "YCm0L5U9tmP9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "onmCp6Ou2Lqb"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}